clear all
version 15

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
global MY_IN_PATH   "C:\replication\data\raw\"
global MY_OUT_PATH  "C:\replication\data\work\"

global MY_OUT_FILE  ${MY_OUT_PATH}deceased_inventors_data.dta
****************************************************************************************************

**********************************************************************
* prepare raw datasets
**********************************************************************
* rawinventor
use "${MY_IN_PATH}rawinventor1.dta", clear
append using "${MY_IN_PATH}rawinventor2.dta"
save "${MY_IN_PATH}rawinventor.dta", replace

* patent
use "${MY_IN_PATH}patent1.dta", clear
foreach num of numlist 2/5 {
append using "${MY_IN_PATH}patent`num'.dta"
}
save "${MY_IN_PATH}patent.dta", replace

* uspatentcitation
use "${MY_IN_PATH}uspatentcitation1.dta", clear
foreach num of numlist 2/8 {
append using "${MY_IN_PATH}uspatentcitation`num'.dta"
}
save "${MY_IN_PATH}uspatentcitation.dta", replace

* inventor.geo.assignee.combo.disambig
use "${MY_IN_PATH}inventor.geo.assignee.combo.disambig1.dta", clear
foreach num of numlist 2/7 {
append using "${MY_IN_PATH}inventor.geo.assignee.combo.disambig`num'.dta"
}
save "${MY_IN_PATH}inventor.geo.assignee.combo.disambig.dta", replace

* persistent_inventor_disambig
unzipfile "${MY_IN_PATH}persistent_inventor_disambig.zip", replace
**********************************************************************

**********************************************************************
* prepare SimpleMaps data
**********************************************************************
import delimited "${MY_IN_PATH}uscities.csv", clear
replace city = proper(city)
replace city = trim(city)
replace city_ascii = proper(city_ascii)
replace city_ascii = trim(city_ascii)
replace city_alt = proper(city_alt)
replace city_alt = trim(city_alt)
replace state_name = proper(state_name)
replace state_name = trim(state_name)
replace county_name = proper(county_name)
replace county_name = trim(county_name)
replace county_name_all = proper(county_name_all)
replace county_name_all = trim(county_name_all)
save "${MY_OUT_PATH}uscities.dta", replace

* identify duplications in state_id city_ascii
bysort state_id city_ascii: gen count_city_state = _N
bysort state_id city_ascii: gen help = 1 if _n==1
tab count_city_state if help == 1
bysort state_id city_ascii (population): gen choice = count_city_state if _n == _N & count_city_state > 1
drop if choice == . & count_city_state > 1
label variable choice "number of possible matches per state/city"
drop help count_city_state
gen ID = _n
save "${MY_OUT_PATH}uscities_merge.dta", replace

use "${MY_OUT_PATH}uscities.dta", replace
drop if city_alt == ""
* identify duplications in state_id city_ascii
bysort state_id city_alt: gen count_city_state = _N
bysort state_id city_alt: gen help = 1 if _n==1
tab count_city_state if help == 1
bysort state_id city_alt (population): gen choice = count_city_state if _n == _N & count_city_state > 1
drop if choice == . & count_city_state > 1
label variable choice "number of possible matches per state/city"
drop help count_city_state
gen ID_alt = _n
drop city_ascii
save "${MY_OUT_PATH}uscities_merge_alt.dta", replace
**********************************************************************

**********************************************************************
* prepare Kaltenberg/Jaffe/Lachman data
**********************************************************************
import delimited using "${MY_IN_PATH}inventor_age_score_gender.csv", varn(1) clear
keep inventor_id birthyear
bysort inventor_id: gen n = _n
keep if n ==1
compress
save "${MY_OUT_PATH}inv_age.dta", replace
import delimited "${MY_IN_PATH}persistent_inventor_disambig.tsv", varn(1) clear 
keep rawinventor_id disamb_inventor_id_20180528
bysort rawinventor_id: gen n =_n
keep if n ==1 
drop n
ren rawinventor_id uuid
merge 1:1 uuid using "${MY_IN_PATH}rawinventor.dta"
keep if _merge == 3
drop _merge
bysort inventor_id disamb_inventor_id_20180528: gen n = _n
keep if n == 1
drop n
ren disamb_inventor_id_20180528 inventor_id_20180528 
drop if inventor_id_20180528 == ""
sort inventor_id inventor_id_20180528 
bysort inventor_id: gen n = _n
keep if n == 1
drop n
ren inventor_id inventor_id_202012
ren inventor_id_20180528 inventor_id
merge n:1 inventor_id using "${MY_OUT_PATH}inv_age.dta"
keep if _merge ==3
drop _merge
drop inventor_id
ren inventor_id_202012 inventor_id
keep inventor_id birthyear 
compress
save "${MY_OUT_PATH}inv_age.dta", replace
**********************************************************************

**********************************************************************
* prepare main data
**********************************************************************
use "${MY_IN_PATH}inventor.geo.assignee.combo.disambig.dta", clear 
**********************************************************************
* cleaning
**********************************************************************
* delete irrelevant variables
drop inventor_id inventor_idx county fips fips_state fips_county latitude longitude zipcode 
* delete irrelevant patents (like design patents etc.)
destring(patno), gen(help) force
drop if help == .
drop help

* adaptation of the variable names and labels
rename patno patent
rename application_date appl_date
label variable patent "patent number"
label variable name "inventor name"
label variable lastname "lastname of inventor"
label variable firstname "firstname of inventor"
label variable country "inventor country"
label variable state "inventor state"
label variable city "inventor city"

* adaptation of the spelling
replace name = proper(name)
replace name = trim(name)
replace lastname = proper(lastname)
replace lastname = trim(lastname)
replace firstname = proper(firstname)
replace firstname = trim(firstname)
replace country = upper(country)
replace country = trim(country)
replace state = upper(state)
replace state = trim(state)
replace city = proper(city)
replace city = trim(city)
replace country = "US" if country == "USA"

* time variables
gen appl_date2 = date(appl_date,"YMD")
drop appl_date
rename appl_date2 appl_date
format appl_date %td
label variable appl_date "application date"
bysort patent (appl_date): replace appl_date = appl_date[_n-1] if appl_date == .
gen grant_date2 = date(grant_date,"YMD")
drop grant_date
rename grant_date2 grant_date
format grant_date %td
label variable grant_date "granting date"
bysort patent (grant_date): replace grant_date = grant_date[_n-1] if grant_date == .
gen appl_year = year(appl_date)
gen grant_year = year(grant_date)
label variable appl_year "application year"
label variable grant_year "granting year"
replace appl_year = . if appl_year > 2018
replace appl_date = . if appl_year > 2018
replace appl_year = . if appl_year < 1900
replace appl_date = . if appl_year < 1900
replace appl_date = td(23apr1997) if patent == "6044045"
replace appl_year = 1997 if patent == "6044045"
replace appl_date = td(23apr1997) if patent == "6143435"
replace appl_year = 1997 if patent == "6143435"
replace appl_date = td(12sep2012) if patent == "8690056"
replace appl_year = 2012 if patent == "8690056"
preserve
keep patent appl_date
duplicates drop
save "${MY_OUT_PATH}appl_date.dta", replace
restore
preserve
keep patent grant_date
duplicates drop
save "${MY_OUT_PATH}grant_date.dta", replace
restore

* sequence
destring(seq), gen(sequence) force
drop seq
label variable sequence "inventor sequence"
**********************************************************************

**********************************************************************
* deceased inventors
**********************************************************************
*****
* identify deceased inventors
*****
gen deceased1 = 0
replace deceased1 = 1 if strpos(name,"Deceased")>0
replace deceased1 = 1 if strpos(lastname,"Deceased")>0
replace deceased1 = 1 if strpos(firstname,"Deceased")>0
label variable deceased1 "deceased inventor"
bysort patent: egen deceased1_patent = total(deceased1)
label variable deceased1_patent "number of deceased inventors on patent"
gen deceased2 = 0
replace deceased2 = 1 if strpos(city,"Late Of") > 0 & deceased1 == 0 
label variable deceased2 "deceased inventor"
bysort patent: egen deceased2_patent = total(deceased2)
label variable deceased2_patent "number of deceased inventors on patent"
gen deceased = deceased1
replace deceased = deceased2 if deceased1 == 0
label variable deceased "deceased inventor"
gen deceased_patent = deceased1_patent
replace deceased_patent = deceased2_patent if deceased1_patent == 0
label variable deceased_patent "number of deceased inventors on patent"

*****
* cleaning
*****
* inventor name
replace lastname = subinstr(lastname, ", Deceased", "", .)
replace lastname = subinstr(lastname, ",Deceased", "", .)
replace lastname = subinstr(lastname, " (Deceased)", "", .)
replace lastname = subinstr(lastname, " Deceased", "", .)
replace firstname = subinstr(firstname, "Deceased, ", "", .)
replace firstname = subinstr(firstname, "Deceased; ", "", .)
replace firstname = subinstr(firstname, "Deceased ", "", .)
replace firstname = subinstr(firstname, ", Deceased", "", .)

* inventor city
replace city = subinstr(city,"Late Of ","",.)
replace city = subinstr(city, "Late Of, ","",.)
replace city = subinstr(city,"Late Of","",.)
replace city = proper(city)

* drop all representatives
gen representative = 0
replace representative = 1 if strpos(name,"Executor") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Executrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Executix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Exectrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Executive") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Coexecutor") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Co-Executor") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Coexecutrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Administrator") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Administator") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Adminstrator") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Administratrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Aministratrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Adminstratrix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0  
replace representative = 1 if strpos(name,"Administratix") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Representative") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Represenative") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Representive") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Representatvie") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Representitive") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Trustee;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Successor Trustee;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Co-Successor Trustee;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Successor") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legatee") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Beneficiary") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0 
replace representative = 1 if strpos(name,"Surviving Spouse") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Widow") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", (Widow Of") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name," Agent; By") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Heir;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Heirs;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Heiress;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Heir") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"All Legar Heirs") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Sole Heir") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Heir At Law") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Heir-At-Law") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Heirs-At-Law") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Heiress-At-Law") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Authorized Heir") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Inheritor") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Guardian") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,", Guardian;") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Assignee Of") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Represented By") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legaless") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Represenative") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Respresentative") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name,"Legal Representative") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
replace representative = 1 if strpos(name," By ") > 0 & name ~= "" & deceased_patent > 0 & deceased == 0
drop if representative ~= 0
drop if patent == "4282794" & name == "Miller, Deceased; By Roberta L."
replace deceased1_patent = 1 if patent == "4282794"
replace deceased_patent = 1 if patent == "4282794"
drop if patent == "7053344" & name == "Chiovatero; By Antoinette"
replace deceased = 1 if patent == "7053344" & name == "Surjan; James"
replace deceased_patent = 1 if patent == "7053344" 
drop if patent == "4413583" & name == "Elling, Executrix; By Hanny M."
drop if patent == "5441769" & name == "Ross, Legal Representative; By Lorna O."
drop representative

* sequence
replace sequence = 0 if patent =="4415550" & name == "Pakhomov; Gennady N."
replace sequence = 7 if patent =="4415550" & name == "Konstantinov; Deceased; Nikolai A."
**********************************************************************
preserve
keep patent lastname firstname country state city appl_date grant_date
gen index = _n
save "${MY_OUT_PATH}citing.dta", replace
restore
**********************************************************************

**********************************************************************
* Restrict dataset
**********************************************************************
* deceased inventors
keep if deceased_patent > 0

* deceased inventor living in the US
gen help = 1 if deceased == 1 & country == "US"
bysort patent: egen help2 = total(help)
drop if deceased_patent ~= help2
drop help help2

* more than one inventor
bysort patent: gen count_inv = _N
label variable count_inv "number of inventors"
keep if count_inv > 1

* US inventors
gen help = 1 if country ~= "US"
bysort patent: egen help2 = total(help)
gen all_US = 1 if help2 == 0
replace all_US = 0 if help2 > 0
gen count_non_US = help2
gen count_US = count_inv - count_non_US
drop help help2
label variable all_US "1 if all inventors live in the US"
label variable count_non_US "number of inventors living abroad"
label variable count_US "number of inventors living in the US"
keep if count_US >= 2

*****
* Location
*****
* cleaning
replace city = subinstr(city,"Mt.","Mount",.)
replace city = subinstr(city,"Ft.","Fort",.)
replace city = subinstr(city,"Hts.","Heights",.)
replace city = subinstr(city,"W.","West",.)
replace city = subinstr(city,"N.","North",.)
replace city = subinstr(city,"E.","East",.)
replace city = subinstr(city,"S.","South",.)
replace city = "Sumner" if patent == "5543082" & name == "Brecht; Doris J."
replace state = "MI" if patent == "5543082" & name == "Brecht; Doris J."
replace city = "Sanford" if patent == "5543082" & name == "Mcgee; James B."
replace state = "MI" if patent == "5543082" & name == "Mcgee; James B."
replace city = "Bay City" if patent == "5543082" & name == "Petroff; Lenin J."
replace state = "MI" if patent == "5543082" & name == "Petroff; Lenin J."
replace city = "Minneapolis" if patent == "5293595" & name == "Caldarale; Charles R."
replace state = "MI" if patent == "5293595" & name == "Caldarale; Charles R."
replace city = "White Bear Lake" if patent == "5293595" & name == "Hancock; Peter J."
replace state = "MI" if patent == "5293595" & name == "Hancock; Peter J."
replace city = "Roseville" if patent == "5293595" & name == "Dudda, Deceased; Klaus G."
replace state = "MI" if patent == "5293595" & name == "Dudda, Deceased; Klaus G."
replace city = "Cranford" if patent == "4092413" & name == "Wood, Jr., Deceased; Sumner"
replace state = "NJ" if patent == "4092413" & name == "Wood, Jr., Deceased; Sumner"

* SimpleMaps
rename city city_ascii
rename state state_id
merge m:1 state_id city_ascii using "${MY_OUT_PATH}uscities_merge.dta", keepusing(lat lng choice) keep(1 3) nogen
rename city_ascii city
gen city_alt = city if lat == .
rename lat lat1
rename lng lng1 
rename choice choice1
merge m:1 state_id city_alt using "${MY_OUT_PATH}uscities_merge_alt.dta", keepusing(lat lng choice) keep(1 3) nogen
replace lat1 = lat if lat1 == .
replace lng1 = lng if lng1 == .
replace choice = choice if choice1 == .
drop lat lng choice
rename lat1 lat
rename lng1 lng 
rename choice1 choice
drop city_alt
rename state_id state
rename lat latitude
rename lng longitude
label variable latitude "latitude"
label variable longitude "longitude"

* city spelling correction
gen city_org = city if latitude == . 
gen state_org = state if latitude == .
merge m:1 state_org city_org using "${MY_IN_PATH}city_spelling_correction.dta", keep(1 3) nogen
drop city_org state_org
rename city_google city_ascii
rename state_google state_id
rename choice choice1
merge m:1 state_id city_ascii using "${MY_OUT_PATH}uscities_merge.dta", keepusing(lat lng choice) keep(1 3) nogen
replace latitude = lat if latitude == .
replace longitude = lng if longitude == .
drop lat lng
replace choice1 = choice if choice1 == .
drop choice
rename choice1 choice
rename state_id state_google
rename city_ascii city_google
gen google = 1 if city_google ~= ""
label variable google "Google city spelling correction"

* drop patent if we do not have location information for all US inventors
gen help = 1 if latitude == . & country == "US"
bysort patent: egen help2 = total(help) 
drop if help2 > 0
drop help help2

compress
save "${MY_OUT_PATH}deceased_inventors_data.dta", replace
**********************************************************************

**********************************************************************
* Prepare patentsview dataset
**********************************************************************
* patent/inventor with a deceased inventor
drop id msa geo pdpass appl_date grant_date appl_year grant_year deceased1 deceased1_patent deceased2 deceased2_patent
keep if deceased_patent > 0
sort patent sequence
save "${MY_OUT_PATH}deceased_SL.dta", replace

* patents with a deceased inventor
keep patent
duplicates drop
save "${MY_OUT_PATH}deceased_patent_SL.dta", replace

* combine datasets
use "${MY_IN_PATH}patent.dta" 
rename number patent_id
drop if patent_id == ""
merge 1:m patent_id using "${MY_IN_PATH}patent_inventor.dta"
drop _merge
rename id record_id
rename location_id id
merge m:1 id using "${MY_IN_PATH}location.dta"
rename id location_id
drop if _merge == 2
drop _merge
rename inventor_id id
merge m:1 id using "${MY_IN_PATH}inventor.dta"
rename id inventor_id
drop if _merge == 2
drop _merge
drop if inventor_id == ""
merge 1:1 patent_id inventor_id using "${MY_IN_PATH}rawinventor.dta", keepusing(sequence deceased) keep(1 3)
drop _merge
compress
save "${MY_OUT_PATH}combined.dta", replace

keep patent_id country inventor_id city state name_first name_last sequence deceased
replace name_first = proper(name_first)
replace name_last = proper(name_last)
replace name_last = subinstr(name_last,", Deceased","",.)
replace name_last = subinstr(name_last," Deceased","",.)
replace name_last = subinstr(name_last,", (Deceased)","",.)
replace name_last = subinstr(name_last," (Deceased)","",.)
replace name_last = subinstr(name_last,",Deceased","",.)
replace name_first = subinstr(name_first,", Deceased","",.)
replace name_first = subinstr(name_first,"Deceased ","",.)
replace name_first = subinstr(name_first,"Deceased, ","",.)
replace name_first = "" if name_first == "Deceased"
rename name_first firstname
rename name_last lastname
rename patent_id patent
rename country country_PV
rename city city_PV
rename state state_PV
rename sequence sequence_PV
rename deceased deceased_PV
save "${MY_OUT_PATH}inventor_id_fuzzy_match.dta", replace

* identify patents with a deceased inventor in patentsview for fuzzy match
use "${MY_OUT_PATH}inventor_id_fuzzy_match.dta", clear
merge m:1 patent using "${MY_OUT_PATH}deceased_patent_SL.dta", keep(3) nogen
sort patent sequence_PV
gen index = _n

* cleaning Umlaute
replace lastname = "Wiedenhofer" if lastname == "WiedenhÃ¶Fer"
replace lastname = "Gotz" if lastname == "GÃ¶Tz"
replace lastname = "Doring" if lastname == "DÃ¶Ring"
replace lastname = "Koppen" if lastname == "KÃ¶Ppen"
replace lastname = "Rosler" if lastname == "RÃ¶Sler"
replace lastname = "Muller" if lastname == "MÃ¼Ller"
replace lastname = "Orlamunder" if lastname == "OrlamÃ¼Nder"
replace lastname = "Roll" if lastname == "RÃ¶Ll"
replace lastname = "Stuwe" if lastname == "StÃ¼We"
replace lastname = "Muller" if lastname == "MÃ¼Ller"
replace lastname = "Heikkila" if lastname == "HeikkilÃ¤"
replace lastname = "Lupke" if lastname == "LÃ¼Pke"
replace lastname = "Muller" if lastname == "MÃ¼Ller"
replace lastname = "Pochmuller" if lastname == "PochmÃ¼Ller"
replace lastname = "Grun" if lastname == "GrÃ¼N"
replace lastname = "Muller" if lastname == "MÃ¼Ller"
replace lastname = "Dollgast" if lastname == "DÃ¶Llgast"
replace lastname = "Stockmann" if lastname == "StÃ¶Ckmann"
replace lastname = "Nagele" if lastname == "NÃ¤Gele"
replace lastname = "Muhl" if lastname == "MÃ¼Hl"
replace lastname = "Jonsson" if lastname == "JÃ¶Nsson"
replace lastname = "Kohler" if lastname == "KÃ¶Hler"
replace lastname = "Glanzel" if lastname == "GlÃ¤Nzel"
replace lastname = "Stumper" if lastname == "StÃ¼Mper"
replace lastname = "Bladel" if lastname == "BlÃ¤Del"
replace lastname = "Lohr" if lastname == "LÃ¶Hr"
replace lastname = "Grobmair" if lastname == "GrÃ¶Bmair"
replace lastname = "Hogl" if lastname == "HÃ¶Gl"
replace lastname = "Kobe" if lastname == "KÃ¶Be"
replace lastname = "Lummen" if lastname == "LÃ¼Mmen"
replace lastname = "Schr.O Slashed.Der Glad" if lastname == "SchrÃ¸Der Glad"
replace lastname = "Sjostedt" if lastname == "SjÃ¶Stedt"
replace lastname = "Orlamunder" if lastname == "OrlamÃ¼Nder"
replace lastname = "Roll" if lastname == "RÃ¶Ll"
replace lastname = "Bj.O Slashed.Rnvad" if lastname == "BjÃ¸Rnvad"
replace lastname = "Kramer" if lastname == "KrÃ¤Mer"
replace lastname = "Kohler" if lastname == "KÃ¶Hler"
replace lastname = "Schulein" if lastname == "SchÃ¼Lein"
replace lastname = "Bj.O Slashed.Rnvad" if lastname == "BjÃ¸Rnvad"
replace lastname = "Fulber" if lastname == "FÃ¼Lber"
replace lastname = "Kasmaier" if lastname == "KÃ¤Smaier"
replace lastname = "Dunninger" if lastname == "DÃ¼Nninger"
replace lastname = "Stohr" if lastname == "StÃ¶Hr"
replace lastname = "Str.Ang.Lin" if lastname == "StrÃ¥Lin"
replace lastname = "Kohler" if lastname == "KÃ¶Hler"
replace lastname = "Bomer" if lastname == "BÃ¶Mer"
replace lastname = "Lohr" if lastname == "LÃ¶Hr"
replace lastname = "Bjorklund" if lastname == "BjÃ¶Rklund"
replace lastname = "Warme" if lastname == "WÃ¤Rme"
replace lastname = "Lohr" if lastname == "LÃ¶Hr"
replace lastname = "Dunninger" if lastname == "DÃ¼Nninger"
replace lastname = "Wikstrom" if lastname == "WikstrÃ¶M"
replace lastname = "Schongen" if lastname == "SchÃ¶Ngen"
replace lastname = "Kohler" if lastname == "KÃ¶Hler"
replace lastname = "Allworden" if lastname == "AllwÃ¶Rden"
replace lastname = "Kobe" if lastname == "KÃ¶Be"
replace lastname = "Mockel" if lastname == "MÃ¶Ckel"
replace lastname = "Jonsson" if lastname == "JÃ¶Nsson"
replace lastname = "Grunewald" if lastname == "GrÃ¼Newald"
replace lastname = "Lohr" if lastname == "LÃ¶Hr"
replace lastname = "Bladel" if lastname == "BlÃ¤Del"
replace lastname = "Dunninger" if lastname == "DÃ¼Nninger"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Bacque" if lastname == "BacquÃ©"
replace lastname = "Hogl" if lastname == "HÃ¶Gl"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Follinger" if lastname == "FÃ¶Llinger"
replace lastname = "Kobe" if lastname == "KÃ¶Be"
replace lastname = "Schwogler" if lastname == "SchwÃ¶Gler"
replace lastname = "Horneschemeyer" if lastname == "HÃ¶Rneschemeyer"
replace lastname = "Hugenschutt" if lastname == "HugenschÃ¼Tt"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Erguden" if lastname == "ErgÃ¼Den"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Bruckner" if lastname == "BrÃ¼Ckner"
replace lastname = "Erguden" if lastname == "ErgÃ¼Den"
replace lastname = "Dunninger" if lastname == "DÃ¼Nninger"
replace lastname = "Follinger" if lastname == "FÃ¶Llinger"
replace lastname = "Kruper" if lastname == "KrÃ¼Per"
replace lastname = "Hoglin" if lastname == "HÃ¶Glin"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Konig" if lastname == "KÃ¶Nig"
replace lastname = "Boss" if lastname == "BÃ¶Ss"
replace lastname = "Hafker" if lastname == "HÃ¤Fker"
replace lastname = "Fusslein" if lastname == "FÃ¼Sslein"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Hock" if lastname == "HÃ¶Ck"
replace lastname = "Sporing" if lastname == "SpÃ¶Ring"
replace lastname = "Deverin" if lastname == "DÃ©Verin"
replace lastname = "Deverin" if lastname == "DÃ©Verin"
replace lastname = "Fusslein" if lastname == "FÃ¼Sslein"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Lundstrom" if lastname == "LundstrÃ¶M"
replace lastname = "Lofstrom" if lastname == "LÃ¶FstrÃ¶M"
replace lastname = "Jonsson" if lastname == "JÃ¶Nsson"
replace lastname = "Kruger" if lastname == "KrÃ¶Ger"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Bruckner" if lastname == "BrÃ¼Ckner"
replace lastname = "Niewohner" if lastname == "NiewÃ¶Hner"
replace lastname = "Goricke" if lastname == "GÃ¶Ricke"
replace lastname = "Sporing" if lastname == "SpÃ¶Ring"
replace lastname = "Jorgedal" if lastname == "JÃ¸Rgedal"
replace lastname = "Schroder" if lastname == "SchrÃ¶Der"
replace lastname = "Ostlund" if lastname == "ÃStlund"
replace lastname = "Lonnqvist" if lastname == "LÃ¶Nnqvist"
replace lastname = "Ella" if lastname == "EllÃ¤"
replace lastname = "Schulein" if lastname == "SchÃ¼Lein"
replace lastname = "Bjornvad" if lastname == "BjÃ¸Rnvad"
replace lastname = "Helmstadter" if lastname == "HelmstÃ¤Dter"
replace lastname = "Jager" if lastname == "JÃ¤Ger"
replace lastname = "Honlein" if lastname == "HÃ¶Nlein"
replace lastname = "Simburger" if lastname == "SimbÃ¼Rger"
replace firstname = "Zoltan" if firstname == "ZoltÃ¡N"
replace firstname = "Jose" if firstname == "JosÃ©"
replace firstname = "Hans-Jorg" if firstname == "Hans-JÃ¶Rg"
replace firstname = ".Ang.Ke" if firstname == "ÃKe"
replace firstname = "Hans-Jorg" if firstname == "Hans-JÃ¶Rg"
replace firstname = "Robert Hans-Jorg" if firstname == "Robert Hans-JÃ¶Rg"
replace firstname = "B.Ang.Rd" if firstname == "BÃ¥Rd"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Gunther" if firstname == "GÃ¼Nther"
replace firstname = "Robert Hans-Jorg" if firstname == "Robert Hans-JÃ¶Rg"
replace firstname = "Gunther Karl" if firstname == "GÃ¼Nther Karl"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Joel" if firstname == "JoÃ«L"
replace firstname = "Gunter" if firstname == "GÃºNter"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Sven Anders Gosta" if firstname == "Sven Anders GÃ¶Sta"
replace firstname = "Gunter" if firstname == "GÃ¼Nter"
replace firstname = "Fran.Cedilla.Ois" if firstname == "FranÃ§Ois"
replace firstname = "Gerard" if firstname == "GÃ©Rard"
replace firstname = "Jerome" if firstname == "JÃ©RÃ´Me"
replace firstname = "Jerome" if firstname == "JÃ©RÃ´Me"
replace firstname = "Monica Mascato" if firstname == "MÃ³Nica Mascato"
replace firstname = "Gunter" if firstname == "GÃ¼Nter"
replace firstname = "Goran" if firstname == "GÃ¶Ran"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Andre" if firstname == "AndrÃ©"
replace firstname = "Sanne O. Schroder" if firstname == "Sanne SchrÃ¸Der"
replace firstname = "Gunther" if firstname == "GÃ¼Nther"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Raphael" if firstname == "RaphaÃ«L"
replace firstname = "Raphael" if firstname == "RaphaÃ«L"
replace firstname = "Gerard" if firstname == "GÃ©Rard"
replace firstname = "Raphael" if firstname == "RaphaÃ«L"
replace firstname = "S.O Slashed.Ren Flensted" if firstname == "SÃ¸Ren Flensted"
replace firstname = "Gerard" if firstname == "GÃ©Rard"
replace firstname = "Jerome" if firstname == "JÃ©RÃ´Me"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Ingeborg Genoveva" if firstname == "GÃ¼Nter"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Gerard" if firstname == "GÃ©Rard"
replace firstname = "Joel" if firstname == "JoÃ«L"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Jenone" if firstname == "JenÃ¶Me"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Frederik Frans Desire" if firstname == "Frederik Frans DesirÃ©"
replace firstname = "Jerome Emile Georges" if firstname == "JÃ©RÃ´Me Emile Georges"
replace firstname = "Hans-Jurgen" if firstname == "Hans-JÃ¼Rgen"
replace firstname = "Sanne O Schroder" if firstname == "Sanne SchrÃ¸Der"
replace firstname = "Raphael" if firstname == "RaphaÃ«L"
replace firstname = "Frederik Frans Desire" if firstname == "Frederik Frans DesirÃ©"
replace firstname = "Rene" if firstname == "RenÃ©"
replace firstname = "Daniel" if firstname == "DaniÃ«L"
replace firstname = "Janos" if firstname == "JÃ¡Nos"
replace firstname = "Tamas" if firstname == "TamÃ¡S"
replace firstname = "Hans-Jorg" if firstname == "Hans-JÃ¶Rg"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Ozkan" if firstname == "ÃZkan"
replace firstname = "Jurgen" if firstname == "JÃ¼Rgen"
replace firstname = "Gunter" if firstname == "GÃ¼Nter"
replace firstname = "Sanne O. Schroder" if firstname == "Sanne SchrÃ¸Der"
replace firstname = "Gunter" if firstname == "GÃ¼Nter"
replace firstname = "Jorg" if firstname == "JÃ¶Rg"
replace firstname = "Frederic" if firstname == "FrÃ©DÃ©Ric"
save "${MY_OUT_PATH}inventor_id_fuzzy_match_deceased.dta", replace
**********************************************************************

**********************************************************************
* Merge inventor id
**********************************************************************
use "${MY_OUT_PATH}deceased_inventors_data.dta", clear
sort patent sequence
drop id
gen id = _n

* Fuzzy Match
reclink patent lastname firstname using  "${MY_OUT_PATH}inventor_id_fuzzy_match_deceased.dta", ///
idmaster(id) idusing(index) gen(score) required(patent) _merge(_merge_fuzzy) minscore(0)

* more than one match (manually cleaning)
bysort patent firstname lastname: gen count = _N
tab count
list patent Upatent score lastname Ulastname firstname Ufirstname city city_PV deceased deceased_PV inventor_id count if count == 2
drop if patent == "4760835" & lastname == "Paulson" & firstname =="John K." & deceased_PV == "FALSE"
drop if patent == "4581410" & lastname == "Martins" & firstname =="Joseph G." & city_PV == "Woburn"
drop if patent == "5745575" & lastname == "Otto" & firstname =="William F." & city_PV == "Moorpark"
drop if patent == "7238008" & lastname == "Plemmons" & firstname =="Lawrence Wayne" & city_PV == "Mauldin"
drop if patent == "5067513" & lastname == "Nicklas" & firstname =="James R." & sequence == 3
drop if patent == "5425136" & lastname == "Bangs" & firstname =="William J." & sequence == 3
drop if patent == "6307336" & lastname == "Goff" & firstname =="Kenneth W." & sequence == 2
drop if patent == "6971538" & lastname == "Luedecke" & firstname =="John E." & sequence == 1
drop if patent == "6918956" & lastname == "Kurple" & firstname =="Karl Vincent" & sequence_PV == 1
drop if patent == "6556052" & lastname == "Dillon" & firstname =="Nancy David" // representative
drop if patent == "6556052" & lastname == "Dillon" & firstname =="Nancy David" // representative
drop if patent == "6774614" & lastname == "Miske" & firstname =="Carol Conde" // representative
drop if patent == "6774614" & lastname == "Miske" & firstname =="Carol Conde" // representative
drop if patent == "6830875" & lastname == "Cangelosi" & firstname =="Joan" // representative
drop if patent == "6830875" & lastname == "Cangelosi" & firstname =="Joan" // representative
drop if patent == "6870419" & lastname == "Dillon" & firstname =="Nancy David" // representative
drop if patent == "6870419" & lastname == "Dillon" & firstname =="Nancy David" // representative
drop if patent == "4992545" & lastname == "Hall" & firstname =="David A." & city_PV == "" 
drop if patent == "4992545" & lastname == "N/A" & firstname =="N/A" 
drop if patent == "5098918" & lastname == "Rynbrandt" & firstname =="Ronald H." & city_PV == "" 
drop if patent == "5098918" & lastname == "N/A" & firstname =="N/A" 
drop if patent == "6861441" & lastname == "Doughty" & firstname =="Jennifer M" // representative  
drop if patent == "6914069" & lastname == "Kiernan" & firstname =="Susan A." // representative  
drop count

* recalculate number of inventors
bysort patent: replace count_inv = _N
keep if count_inv > 1
gen help = 1 if country ~= "US"
bysort patent: egen help2 = total(help)
replace all_US = 1 if help2 == 0
replace all_US = 0 if help2 > 0
replace count_non_US = help2
replace count_US = count_inv - count_non_US
drop help help2
keep if count_US >= 2

* score
list patent Upatent score lastname Ulastname firstname Ufirstname if score >= 0.9 & score < 1 // manually checked
list patent Upatent score lastname Ulastname firstname Ufirstname if score >= 0.8 & score < 0.9 // manually checked (first names missing)
list patent Upatent score lastname Ulastname firstname Ufirstname if score >= 0.7 & score < 0.8 // manually checked
replace inventor_id = "" if score < 0.7 // 65 changes

* drop irrelevant variables
drop Upatent Ulastname Ufirstname id score index country_PV city_PV state_PV sequence_PV deceased_PV _merge_fuzzy

label variable inventor_id "inventor id (patentsview)"

* inventors identified
gen help = 1 if inventor_id == ""
bysort patent: egen count_no_ID = total(help)
label variable count_no_ID "number of not identified inventors"
drop help
gen deceased_identified = inventor_id ~= "" if deceased == 1
label variable deceased_identified "1 if deceased inventor is identified, 0 otherwise"
**********************************************************************

**********************************************************************
* Only patents with exactly one deceased inventor
**********************************************************************
drop if deceased_patent > 1

gsort patent -deceased lastname firstname sequence
by patent: gen seq_id = _n
save "${MY_OUT_PATH}deceased_inventors_data.dta", replace
**********************************************************************

**********************************************************************
* Patent information
**********************************************************************
use "${MY_OUT_PATH}deceased_inventors_data.dta", clear
keep patent appl_date grant_date appl_year grant_year deceased_patent count_inv all_US count_non_US count_US count_no_ID deceased_identified
rename deceased_identified help
bysort patent: egen deceased_identified = total(help)
label variable deceased_identified  "1 if deceased inventor has an inventor_id, 0 otherwise"
drop help
duplicates drop
order patent appl_date grant_date grant_year deceased_patent count_inv count_non_US count_US all_US count_no_ID deceased_identified
save "${MY_OUT_PATH}patent_information.dta", replace
**********************************************************************

**********************************************************************
* Inventor information (all inventors patentsview) (takes several hours to compute seniority)
**********************************************************************
use "${MY_OUT_PATH}combined.dta", clear
keep patent_id inventor_id
drop if inventor_id == ""

* delete irrelevant patents (like design patents etc.)
destring(patent_id), gen(help) force
drop if help == .
drop help

* time variables
merge m:1 patent_id using "${MY_IN_PATH}application.dta", keep(1 3) keepusing(date) nogen
gen appl_date = date(date,"YMD")
drop date
format appl_date %td
rename patent_id number
merge m:1 number using "${MY_IN_PATH}patent.dta", keep(1 3) keepusing(date) nogen
rename number patent_id
gen grant_date = date(date,"YMD")
drop date
format grant_date %td

*time to first patent application
bysort inventor_id (appl_date patent_id): gen help = appl_date[1]
gen time_to_first_appl = appl_date - help
label variable time_to_first_appl "time to first patent application"
drop help

* number of current patent applications
gen help = 1
bysort inventor_id (appl_date patent_id): gen count_applications = sum(help)
label variable count_applications "number of current patent applications"
replace count_applications = . if appl_date == .
drop help

* number of patent applications within 5 and 10 years
bysort inventor_id: gen count = _N
sum count
local max = r(max)
display `max'
gen  count_applications10 = 0
gen  count_applications5 = 0
forval num = 1(1)`max' {
bysort inventor_id (appl_date): replace count_applications10 = count_applications10 + 1 if appl_date <= (appl_date[_n-`num'] + 365.25*10) & inventor_id == inventor_id[_n-`num']
bysort inventor_id (appl_date): replace count_applications5 = count_applications5 + 1 if appl_date <= (appl_date[_n-`num'] + 365.25*5) & inventor_id == inventor_id[_n-`num']
di `num'
}
drop count
label variable count_applications10 "number of applications within the last 10 years"
label variable count_applications5 "number of applications within the last 5 years"
rename patent_id patent
save "${MY_OUT_PATH}inventor_information_long.dta", replace
drop appl_date grant_date
save "${MY_OUT_PATH}inventor_information.dta", replace
**********************************************************************



**********************************************************************
* Cited Citing
**********************************************************************
use "${MY_IN_PATH}patent_inventor.dta"
keep patent_id inventor_id
bysort patent_id: gen num = _n
reshape wide inventor_id, i(patent_id) j(num)
save "${MY_OUT_PATH}patent_inventorid_wide.dta"

* cited citing
use "${MY_IN_PATH}uspatentcitation.dta", clear // cited-citing patentsview
keep patent_id citation_id category
rename patent_id citing
rename citation_id cited
rename cited patent
merge m:1 patent using "${MY_OUT_PATH}deceased_patent_SL.dta", keep(2 3) nogen
rename patent cited

* id cited
rename cited patent_id
merge m:1 patent_id using "${MY_OUT_PATH}patent_inventorid_wide.dta", keep(1 3) nogen // inventor id cited
rename patent_id cited
foreach var of varlist inventor_id* { // drop variables with all missing values
    capture assert missing(`var')
    if !_rc {
        drop `var'
    }
}
foreach var of varlist inventor_id* { // rename id-variables for cited
rename `var' cited_`var'
}

*id citing
rename citing patent_id
merge m:1 patent_id using "${MY_OUT_PATH}patent_inventorid_wide.dta", keep(1 3) nogen // inventor id citing
rename patent_id citing
foreach var of varlist inventor_id* { // drop variables with all missing values
    capture assert missing(`var')
    if !_rc {
        drop `var'
    }
}
foreach var of varlist inventor_id* { // rename id-variables for citing
rename `var' citing_`var'
}

* calculate self cites
gen self_cite = 0
foreach num_cited of numlist 1/23 { //check number
foreach num_citing of numlist 1/29 { //check number
replace self_cite = 1 if cited_inventor_id`num_cited' == citing_inventor_id`num_citing' & cited_inventor_id`num_cited' ~= "" & citing_inventor_id`num_citing' ~= ""
}
}
drop cited_inventor_id* citing_inventor_id* 
label variable self_cite "1 if citation is a self citation, 0 otherwise"

* examiner cite
gen examiner_cite = 0 if category ~= ""
replace examiner_cite = 1 if category == "cited by examiner"
label variable examiner_cite "1 if patent is cited by an exminer, 0 if cited by someone else"
drop category
sort cited citing
save "${MY_OUT_PATH}deceased_cited_citing.dta", replace // all cited-citing with a deceased inventor in the cited patent

* deceased cited
use "${MY_OUT_PATH}deceased_cited_citing.dta", clear
keep cited
duplicates drop
save "${MY_OUT_PATH}deceased_cited.dta", replace // all cited patents with a deceased inventor
* deceased citing
use "${MY_OUT_PATH}deceased_cited_citing.dta", clear
keep citing
duplicates drop
save "${MY_OUT_PATH}deceased_citing.dta", replace // all citing patents which cite a patent with a deceased inventor

**********
* Cited
**********
use "${MY_OUT_PATH}deceased_inventors_data.dta", clear 
gsort patent -deceased lastname firstname sequence // dataset sortet by patent -deceased lastname firstname sequence
keep patent deceased latitude longitude seq_id appl_date inventor_id
encode patent, gen(patent_num)
tsset patent_num seq_id
drop deceased
rename latitude lat_cited
rename longitude lng_cited
rename inventor_id inventor_id_cited
order inventor_id_cited lat_cited lng_cited
reshape wide lat_cited lng_cited inventor_id_cited, i(patent_num) j(seq_id)
drop patent_num
order patent
rename patent cited
rename appl_date appl_date_cited
save "${MY_OUT_PATH}geo_cited.dta", replace
**********

**********
* Citing
**********
use "${MY_OUT_PATH}deceased_citing.dta", clear
rename citing patent
merge 1:m patent using "${MY_OUT_PATH}citing.dta", keep(1 3)
drop if _merge == 1
drop _merge

*replace city = subinstr(city,"St.","Saint",.)
replace city = subinstr(city,"Mt.","Mount",.)
replace city = subinstr(city,"Ft.","Fort",.)
replace city = subinstr(city,"Hts.","Heights",.)
replace city = subinstr(city,"W.","West",.)
replace city = subinstr(city,"N.","North",.)
replace city = subinstr(city,"E.","East",.)
replace city = subinstr(city,"S.","South",.)

* SimpleMaps
rename city city_ascii
rename state state_id
merge m:1 state_id city_ascii using "${MY_OUT_PATH}uscities_merge.dta", keepusing(lat lng choice) keep(1 3) nogen
rename city_ascii city
gen city_alt = city if lat == .
rename lat lat1
rename lng lng1 
rename choice choice1
merge m:1 state_id city_alt using "${MY_OUT_PATH}uscities_merge_alt.dta", keepusing(lat lng choice) keep(1 3) nogen
replace lat1 = lat if lat1 == .
replace lng1 = lng if lng1 == .
replace choice = choice if choice1 == .
drop lat lng choice
rename lat1 lat
rename lng1 lng 
rename choice1 choice
drop city_alt
rename state_id state
rename lat latitude
rename lng longitude
label variable latitude "latitude"
label variable longitude "longitude"

* city spelling correction
gen city_org = city if latitude == . 
gen state_org = state if latitude == .
merge m:1 state_org city_org using "${MY_IN_PATH}city_spelling_correction.dta", keep(1 3) nogen
drop city_org state_org
rename city_google city_ascii
rename state_google state_id
rename choice choice1
merge m:1 state_id city_ascii using "${MY_OUT_PATH}uscities_merge.dta", keepusing(lat lng choice) keep(1 3) nogen
replace latitude = lat if latitude == .
replace longitude = lng if longitude == .
drop lat lng
replace choice1 = choice if choice1 == .
drop choice
rename choice1 choice
rename state_id state_google
rename city_ascii city_google
gen google = 1 if city_google ~= ""
label variable google "Google city spelling correction"

* drop patents when all inventors live in foreign countries
gen help = 1 if country == "US"
bysort patent: egen count_inv_us = total(help)
drop if count_inv_us == 0
drop help

* drop patents if we do not have location information for all US inventors
gen help = 1 if latitude == . & country == "US"
bysort patent: egen help2 = total(help) 
drop if help2 > 0
drop help help2

bysort patent: gen count_inv = _N
gen count_inv_foreign = count_inv - count_inv_us

label variable count_inv_us "number of US inventors"
label variable count_inv "number of inventors"
label variable count_inv_foreign "number of foreign inventors"

save "${MY_OUT_PATH}deceased_citing_US.dta", replace

keep patent latitude longitude appl_date
sort patent latitude longitude
rename latitude lat_citing
rename longitude lng_citing
by patent: gen index = _n
encode patent, gen(patent_num)
tsset patent_num index
reshape wide lat_citing lng_citing, i(patent_num) j(index)
drop patent_num
order patent
rename patent citing
rename appl_date appl_date_citing
save "${MY_OUT_PATH}geo_citing.dta", replace
**********

**********
* Cited-Citing
**********
use "${MY_OUT_PATH}deceased_cited_citing.dta", clear
order cited citing
drop if citing == ""

* restrictions
rename cited patent
merge m:1 patent using "${MY_OUT_PATH}patent_information.dta", keep(1 3) nogen
rename patent cited
keep if count_non_US == 0
keep if count_no_ID == 0
drop if appl_year < 1976
drop if appl_year > 2005
rename appl_year appl_year_cited
rename appl_date appl_date_cited
rename grant_year grant_year_cited
rename grant_date grant_date_cited
drop deceased_patent count_inv count_non_US count_US all_US count_no_ID deceased_identified

merge m:1 cited using "${MY_OUT_PATH}geo_cited.dta", keep(3) nogen // only cited with all location information for all US inventors
merge m:1 citing using "${MY_OUT_PATH}geo_citing.dta", keep(1 3) // _merge == 1 identifies the patents with no citation or no location information for the citing patent
drop if lat_citing1 == . & citing ~= "" // only patents with all location information for all US inventors
drop _merge

foreach num of numlist 2/18 {
geodist lat_cited1 lng_cited1 lat_cited`num' lng_cited`num', generate(dist_`num') miles
}
foreach num of numlist 2/18 {
drop if dist_`num' == 0 // only patents with co-inventors living in a different city than the deceased inventor
} 
drop inventor_id_cited19 - lng_cited23 lat_citing27 - lng_citing29 // drop empty variables

* calculate distances between inventors
foreach num_cited of numlist 1/18 {
foreach num_citing of numlist 1/26 {
geodist lat_cited`num_cited' lng_cited`num_cited' lat_citing`num_citing' lng_citing`num_citing', generate(distance_`num_cited'_`num_citing') miles
}
}

* age information
foreach num of numlist 1/18 {
rename inventor_id_cited`num' inventor_id
merge m:1 inventor_id using "${MY_OUT_PATH}inv_age.dta", keep(1 3) keepusing(birthyear) nogen
gen age_`num' = appl_year - birthyear
replace age_`num' = . if age_`num' < 10
drop birthyear
rename inventor_id inventor_id_cited`num'
}

* number of applications
rename cited patent
foreach num of numlist 1/18 {
rename inventor_id_cited`num' inventor_id
merge m:1 patent inventor_id using "${MY_OUT_PATH}inventor_information.dta", keep(1 3) keepusing(count_applications5) nogen
rename count_applications5 count_applications5_`num'
rename inventor_id inventor_id_cited`num'
}
rename patent cited 
order cited appl_date_cited appl_year_cited grant_date_cited grant_year_cited inventor_id_cited* dist_* age* count_applications5_* citing appl_date_citing distance_* self_cite examiner_cite 
save "${MY_OUT_PATH}geo_cited_citing.dta", replace // cited1 is the deceased inventor

drop lat_cited1 - lng_citing26

label variable cited "cited patent"
foreach num of numlist 2/18 {
label variable dist_`num' "distance between deceased and co-inventors"
}
foreach num of numlist 1/18 {
label variable age_`num' "age of cited inventor"
}
foreach num1 of numlist 1/18 {
foreach num2 of numlist 1/26 {
label variable distance_`num1'_`num2' "distance between cited and citing inventor"
}
}
label variable citing "citing patent"
label data "Balsmeier, Fleming, Lueck (2022): Isolating personal knowledge spillovers"
compress
save "${MY_OUT_PATH}analysis_sample.dta", replace // cited1 is the deceased inventor
**********************************************************************
exit
